Load exploration
qplot(x=BMXHT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=RIDAGEYR,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=BMXWT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

regression models
test_df <- test_data |> select(-BMXWAIST)
run_model <- function(formula_str,train_data_set=train_data,
test_data_set=test_df){
# setup regression model
lm_reg = lm(formula = as.formula(formula_str), train_data_set)
print(summary(lm_reg))
# run prediction
lm_pred = predict(lm_reg, newdata = test_df, se = T)
# save prediction results
pred_df = data.frame(
fit = lm_pred$fit,
weight = test_data$BMXWT,
sex = test_data$RIAGENDR,
label = test_data$BMXWAIST
)
# compute MSE
mse<- mean_square_error(pred_df$fit, pred_df$label)
#plot results
g <- ggplot(pred_df, aes(x = weight, y = label)) + geom_point(colour = "black",alpha = 0.1) +
geom_point(aes(x = weight, y = fit, colour = sex,alpha = 0.1),
size = 1.5) + ylab("waist circumference")
g+ggtitle(paste("MSE = ",mse))
}
regression models weight
run_model("BMXWAIST ~ BMXWT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.9731 -4.9555 -0.3079 4.9367 29.6161
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.014485 0.416713 105.6 <2e-16 ***
## BMXWT 0.679405 0.004972 136.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.186 on 4998 degrees of freedom
## Multiple R-squared: 0.7888, Adjusted R-squared: 0.7888
## F-statistic: 1.867e+04 on 1 and 4998 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXWT)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.8911 -4.9106 -0.2857 4.9437 29.0958
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.6842 0.8579 74.23 <2e-16 ***
## bs(BMXWT)1 41.7630 2.4755 16.87 <2e-16 ***
## bs(BMXWT)2 75.0338 1.9124 39.23 <2e-16 ***
## bs(BMXWT)3 99.9140 3.2283 30.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.154 on 4996 degrees of freedom
## Multiple R-squared: 0.7908, Adjusted R-squared: 0.7906
## F-statistic: 6294 on 3 and 4996 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.4842 -3.9345 -0.0767 3.9626 28.7827
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.757744 0.667719 56.547 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.868468 0.981494 0.885 0.37628
## bs(RIDAGEYR, df = 7)2 0.991158 0.676366 1.465 0.14287
## bs(RIDAGEYR, df = 7)3 2.294245 0.766176 2.994 0.00276 **
## bs(RIDAGEYR, df = 7)4 4.669268 0.682401 6.842 8.73e-12 ***
## bs(RIDAGEYR, df = 7)5 9.482672 0.857544 11.058 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 9.862797 0.937849 10.516 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 12.172864 1.098358 11.083 < 2e-16 ***
## BMXWT 0.723016 0.004455 162.280 < 2e-16 ***
## RIAGENDRMale -4.103855 0.179771 -22.828 < 2e-16 ***
## years2005-2006 -0.017005 0.370898 -0.046 0.96343
## years2007-2008 0.064039 0.347084 0.185 0.85362
## years2009-2010 -0.202011 0.345229 -0.585 0.55847
## years2013-2014 0.313321 0.359356 0.872 0.38331
## years2015-2016 1.091476 0.351646 3.104 0.00192 **
## years2017-2018 0.354006 0.365483 0.969 0.33279
## years2022-2012 -0.257131 0.363763 -0.707 0.47968
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.064 on 4983 degrees of freedom
## Multiple R-squared: 0.8501, Adjusted R-squared: 0.8496
## F-statistic: 1766 on 16 and 4983 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ ns(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.5761 -3.9386 -0.0617 3.9344 28.7022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.815716 0.585583 64.578 < 2e-16 ***
## ns(RIDAGEYR, df = 7)1 1.266134 0.560684 2.258 0.023977 *
## ns(RIDAGEYR, df = 7)2 2.621256 0.699723 3.746 0.000182 ***
## ns(RIDAGEYR, df = 7)3 4.773123 0.634407 7.524 6.28e-14 ***
## ns(RIDAGEYR, df = 7)4 6.761823 0.623801 10.840 < 2e-16 ***
## ns(RIDAGEYR, df = 7)5 9.518455 0.604527 15.745 < 2e-16 ***
## ns(RIDAGEYR, df = 7)6 11.743722 1.084512 10.829 < 2e-16 ***
## ns(RIDAGEYR, df = 7)7 10.489553 0.665906 15.752 < 2e-16 ***
## BMXWT 0.723027 0.004455 162.300 < 2e-16 ***
## RIAGENDRMale -4.107483 0.179788 -22.846 < 2e-16 ***
## years2005-2006 -0.014358 0.370921 -0.039 0.969123
## years2007-2008 0.031140 0.346287 0.090 0.928351
## years2009-2010 -0.242658 0.344236 -0.705 0.480895
## years2013-2014 0.284803 0.358807 0.794 0.427379
## years2015-2016 1.047251 0.350770 2.986 0.002844 **
## years2017-2018 0.326165 0.364711 0.894 0.371199
## years2022-2012 -0.282681 0.363222 -0.778 0.436454
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.064 on 4983 degrees of freedom
## Multiple R-squared: 0.8501, Adjusted R-squared: 0.8496
## F-statistic: 1766 on 16 and 4983 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.6985 -3.2686 0.0629 3.2866 30.0961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 126.51084 1.81254 69.798 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.76657 0.83547 0.918 0.35891
## bs(RIDAGEYR, df = 7)2 0.72922 0.57508 1.268 0.20485
## bs(RIDAGEYR, df = 7)3 2.05288 0.65192 3.149 0.00165 **
## bs(RIDAGEYR, df = 7)4 3.58750 0.57951 6.191 6.48e-10 ***
## bs(RIDAGEYR, df = 7)5 7.62294 0.72950 10.450 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.89324 0.79475 9.932 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.23133 0.93061 10.994 < 2e-16 ***
## bs(BMXWT)1 55.10227 1.81280 30.396 < 2e-16 ***
## bs(BMXWT)2 88.47071 1.40174 63.115 < 2e-16 ***
## bs(BMXWT)3 115.44075 2.34801 49.165 < 2e-16 ***
## RIAGENDRMale 0.81086 0.19962 4.062 4.94e-05 ***
## BMXHT -0.45093 0.01077 -41.878 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.161 on 4987 degrees of freedom
## Multiple R-squared: 0.8913, Adjusted R-squared: 0.891
## F-statistic: 3408 on 12 and 4987 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.3755 -3.3134 0.0594 3.2993 29.1634
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 103.443472 1.775803 58.252 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 1.092503 0.858321 1.273 0.203135
## bs(RIDAGEYR, df = 7)2 0.749962 0.591504 1.268 0.204896
## bs(RIDAGEYR, df = 7)3 2.328956 0.670010 3.476 0.000513 ***
## bs(RIDAGEYR, df = 7)4 3.920739 0.597056 6.567 5.67e-11 ***
## bs(RIDAGEYR, df = 7)5 7.883671 0.751020 10.497 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.415141 0.820967 10.250 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 9.872650 0.962292 10.260 < 2e-16 ***
## BMXWT 0.788170 0.004236 186.047 < 2e-16 ***
## RIAGENDRMale 1.041177 0.204865 5.082 3.87e-07 ***
## BMXHT -0.433459 0.011067 -39.167 < 2e-16 ***
## years2005-2006 -0.050623 0.324346 -0.156 0.875979
## years2007-2008 -0.209820 0.303600 -0.691 0.489531
## years2009-2010 -0.429730 0.301954 -1.423 0.154751
## years2013-2014 -0.028985 0.314373 -0.092 0.926544
## years2015-2016 0.244278 0.308269 0.792 0.428156
## years2017-2018 -0.304234 0.320051 -0.951 0.341865
## years2022-2012 -0.394561 0.318125 -1.240 0.214934
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.303 on 4982 degrees of freedom
## Multiple R-squared: 0.8854, Adjusted R-squared: 0.885
## F-statistic: 2263 on 17 and 4982 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + invNorm(BMXWT) + RIAGENDR + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.3084 -3.5766 -0.0816 3.4916 30.7634
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 170.02722 1.99757 85.117 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.34416 0.90701 0.379 0.70437
## bs(RIDAGEYR, df = 7)2 0.62591 0.62513 1.001 0.31675
## bs(RIDAGEYR, df = 7)3 1.85213 0.70813 2.616 0.00894 **
## bs(RIDAGEYR, df = 7)4 3.13323 0.63107 4.965 7.10e-07 ***
## bs(RIDAGEYR, df = 7)5 7.49316 0.79377 9.440 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 6.97959 0.86756 8.045 1.07e-15 ***
## bs(RIDAGEYR, df = 7)7 10.74823 1.01711 10.567 < 2e-16 ***
## invNorm(BMXWT) 16.18351 0.09271 174.563 < 2e-16 ***
## RIAGENDRMale 0.30981 0.21636 1.432 0.15224
## BMXHT -0.44434 0.01174 -37.863 < 2e-16 ***
## years2005-2006 -0.37844 0.34279 -1.104 0.26964
## years2007-2008 -0.19696 0.32084 -0.614 0.53932
## years2009-2010 -0.06324 0.31908 -0.198 0.84289
## years2013-2014 0.33212 0.33221 1.000 0.31749
## years2015-2016 0.57295 0.32571 1.759 0.07863 .
## years2017-2018 0.13966 0.33812 0.413 0.67959
## years2022-2012 -0.35054 0.33619 -1.043 0.29715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.604 on 4982 degrees of freedom
## Multiple R-squared: 0.872, Adjusted R-squared: 0.8715
## F-statistic: 1996 on 17 and 4982 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + invNorm(BMXHT) + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.5708 -3.3475 0.0416 3.3121 28.7053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.158108 0.610040 51.076 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 1.044600 0.860906 1.213 0.225045
## bs(RIDAGEYR, df = 7)2 0.739225 0.593293 1.246 0.212835
## bs(RIDAGEYR, df = 7)3 2.297568 0.672032 3.419 0.000634 ***
## bs(RIDAGEYR, df = 7)4 3.898765 0.598883 6.510 8.25e-11 ***
## bs(RIDAGEYR, df = 7)5 7.871112 0.753328 10.448 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.424714 0.823451 10.231 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 9.805344 0.965342 10.157 < 2e-16 ***
## BMXWT 0.786882 0.004243 185.468 < 2e-16 ***
## RIAGENDRMale 0.835417 0.202937 4.117 3.91e-05 ***
## invNorm(BMXHT) -4.324482 0.111848 -38.664 < 2e-16 ***
## years2005-2006 -0.049070 0.325325 -0.151 0.880113
## years2007-2008 -0.201421 0.304514 -0.661 0.508354
## years2009-2010 -0.431643 0.302867 -1.425 0.154165
## years2013-2014 -0.028603 0.315324 -0.091 0.927728
## years2015-2016 0.258252 0.309190 0.835 0.403615
## years2017-2018 -0.276951 0.320990 -0.863 0.388287
## years2022-2012 -0.398972 0.319087 -1.250 0.211229
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.319 on 4982 degrees of freedom
## Multiple R-squared: 0.8847, Adjusted R-squared: 0.8843
## F-statistic: 2248 on 17 and 4982 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5990 -3.2740 0.0656 3.2732 30.2603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 126.28530 1.83903 68.670 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.80575 0.83563 0.964 0.33497
## bs(RIDAGEYR, df = 7)2 0.71904 0.57565 1.249 0.21170
## bs(RIDAGEYR, df = 7)3 2.08042 0.65222 3.190 0.00143 **
## bs(RIDAGEYR, df = 7)4 3.60169 0.58128 6.196 6.25e-10 ***
## bs(RIDAGEYR, df = 7)5 7.64678 0.73095 10.461 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.88009 0.79947 9.857 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.28021 0.93670 10.975 < 2e-16 ***
## bs(BMXWT)1 55.21868 1.81434 30.435 < 2e-16 ***
## bs(BMXWT)2 88.36788 1.40271 62.998 < 2e-16 ***
## bs(BMXWT)3 115.51207 2.34967 49.161 < 2e-16 ***
## RIAGENDRMale 0.79282 0.19998 3.965 7.46e-05 ***
## BMXHT -0.44914 0.01081 -41.553 < 2e-16 ***
## years2005-2006 -0.19173 0.31571 -0.607 0.54367
## years2007-2008 -0.20056 0.29545 -0.679 0.49729
## years2009-2010 -0.30785 0.29390 -1.047 0.29495
## years2013-2014 0.08846 0.30598 0.289 0.77252
## years2015-2016 0.31072 0.30003 1.036 0.30044
## years2017-2018 -0.17265 0.31153 -0.554 0.57947
## years2022-2012 -0.40186 0.30955 -1.298 0.19428
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.16 on 4980 degrees of freedom
## Multiple R-squared: 0.8915, Adjusted R-squared: 0.8911
## F-statistic: 2154 on 19 and 4980 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT,df=7) + RIAGENDR + bs(BMXHT,df=7) + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.8374 -3.2431 0.0935 3.2656 30.1903
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.2612 4.3252 15.089 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.8267 0.8361 0.989 0.322810
## bs(RIDAGEYR, df = 7)2 0.7281 0.5764 1.263 0.206507
## bs(RIDAGEYR, df = 7)3 2.0879 0.6524 3.200 0.001381 **
## bs(RIDAGEYR, df = 7)4 3.6066 0.5817 6.200 6.11e-10 ***
## bs(RIDAGEYR, df = 7)5 7.7005 0.7318 10.522 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.8766 0.8003 9.842 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.2975 0.9383 10.974 < 2e-16 ***
## bs(BMXWT, df = 7)1 13.6283 3.7862 3.600 0.000322 ***
## bs(BMXWT, df = 7)2 26.3346 2.4649 10.684 < 2e-16 ***
## bs(BMXWT, df = 7)3 40.4432 2.7961 14.464 < 2e-16 ***
## bs(BMXWT, df = 7)4 50.3226 2.6827 18.758 < 2e-16 ***
## bs(BMXWT, df = 7)5 79.0649 2.9208 27.069 < 2e-16 ***
## bs(BMXWT, df = 7)6 103.9789 3.2701 31.797 < 2e-16 ***
## bs(BMXWT, df = 7)7 115.4465 3.9069 29.550 < 2e-16 ***
## RIAGENDRMale 0.8196 0.2071 3.958 7.66e-05 ***
## bs(BMXHT, df = 7)1 -6.6685 4.6565 -1.432 0.152178
## bs(BMXHT, df = 7)2 -9.2048 3.1063 -2.963 0.003059 **
## bs(BMXHT, df = 7)3 -14.8818 3.4781 -4.279 1.92e-05 ***
## bs(BMXHT, df = 7)4 -17.1775 3.3611 -5.111 3.33e-07 ***
## bs(BMXHT, df = 7)5 -23.9234 3.5467 -6.745 1.70e-11 ***
## bs(BMXHT, df = 7)6 -29.5770 3.6498 -8.104 6.65e-16 ***
## bs(BMXHT, df = 7)7 -28.6051 4.3907 -6.515 7.99e-11 ***
## years2005-2006 -0.1834 0.3159 -0.580 0.561636
## years2007-2008 -0.1952 0.2956 -0.660 0.509010
## years2009-2010 -0.3012 0.2940 -1.024 0.305695
## years2013-2014 0.0740 0.3064 0.242 0.809162
## years2015-2016 0.3089 0.3003 1.029 0.303729
## years2017-2018 -0.1768 0.3118 -0.567 0.570570
## years2022-2012 -0.4078 0.3098 -1.316 0.188148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.159 on 4970 degrees of freedom
## Multiple R-squared: 0.8918, Adjusted R-squared: 0.8911
## F-statistic: 1412 on 29 and 4970 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = c(`20%` = 64.1, `40%` = 73.7, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.421 -3.240 0.117 3.274 30.388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 127.38026 1.87551 67.918 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 1.61607 1.12738 1.433 0.15179
## bs(RIDAGEYR, df = 7)2 0.07844 0.78498 0.100 0.92040
## bs(RIDAGEYR, df = 7)3 1.53459 0.89048 1.723 0.08489 .
## bs(RIDAGEYR, df = 7)4 2.14530 0.79534 2.697 0.00701 **
## bs(RIDAGEYR, df = 7)5 6.20633 1.00302 6.188 6.60e-10 ***
## bs(RIDAGEYR, df = 7)6 6.17625 1.10113 5.609 2.15e-08 ***
## bs(RIDAGEYR, df = 7)7 9.04269 1.28625 7.030 2.34e-12 ***
## RIAGENDRMale -1.05000 0.89340 -1.175 0.23994
## bs(BMXWT)1 55.15674 1.80515 30.555 < 2e-16 ***
## bs(BMXWT)2 88.50642 1.39368 63.506 < 2e-16 ***
## bs(BMXWT)3 115.28861 2.33313 49.414 < 2e-16 ***
## BMXHT -0.45072 0.01074 -41.966 < 2e-16 ***
## years2005-2006 -0.21290 0.31349 -0.679 0.49708
## years2007-2008 -0.19869 0.29351 -0.677 0.49847
## years2009-2010 -0.30419 0.29194 -1.042 0.29749
## years2013-2014 0.13765 0.30399 0.453 0.65070
## years2015-2016 0.36156 0.29805 1.213 0.22515
## years2017-2018 -0.18806 0.30942 -0.608 0.54334
## years2022-2012 -0.31883 0.30754 -1.037 0.29992
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale -1.72731 1.66430 -1.038 0.29938
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale 1.42978 1.14516 1.249 0.21189
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale 1.24676 1.29715 0.961 0.33652
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale 3.07599 1.15245 2.669 0.00763 **
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale 3.06589 1.44962 2.115 0.03448 *
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale 3.47578 1.57629 2.205 0.02750 *
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale 2.85646 1.84529 1.548 0.12169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.121 on 4973 degrees of freedom
## Multiple R-squared: 0.8933, Adjusted R-squared: 0.8927
## F-statistic: 1601 on 26 and 4973 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + RIAGENDR+ BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.421 -3.240 0.117 3.274 30.388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 127.38026 1.87551 67.918 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 1.61607 1.12738 1.433 0.15179
## bs(RIDAGEYR, df = 7)2 0.07844 0.78498 0.100 0.92040
## bs(RIDAGEYR, df = 7)3 1.53459 0.89048 1.723 0.08489 .
## bs(RIDAGEYR, df = 7)4 2.14530 0.79534 2.697 0.00701 **
## bs(RIDAGEYR, df = 7)5 6.20633 1.00302 6.188 6.60e-10 ***
## bs(RIDAGEYR, df = 7)6 6.17625 1.10113 5.609 2.15e-08 ***
## bs(RIDAGEYR, df = 7)7 9.04269 1.28625 7.030 2.34e-12 ***
## RIAGENDRMale -1.05000 0.89340 -1.175 0.23994
## bs(BMXWT)1 55.15674 1.80515 30.555 < 2e-16 ***
## bs(BMXWT)2 88.50642 1.39368 63.506 < 2e-16 ***
## bs(BMXWT)3 115.28861 2.33313 49.414 < 2e-16 ***
## BMXHT -0.45072 0.01074 -41.966 < 2e-16 ***
## years2005-2006 -0.21290 0.31349 -0.679 0.49708
## years2007-2008 -0.19869 0.29351 -0.677 0.49847
## years2009-2010 -0.30419 0.29194 -1.042 0.29749
## years2013-2014 0.13765 0.30399 0.453 0.65070
## years2015-2016 0.36156 0.29805 1.213 0.22515
## years2017-2018 -0.18806 0.30942 -0.608 0.54334
## years2022-2012 -0.31883 0.30754 -1.037 0.29992
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale -1.72731 1.66430 -1.038 0.29938
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale 1.42978 1.14516 1.249 0.21189
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale 1.24676 1.29715 0.961 0.33652
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale 3.07599 1.15245 2.669 0.00763 **
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale 3.06589 1.44962 2.115 0.03448 *
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale 3.47578 1.57629 2.205 0.02750 *
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale 2.85646 1.84529 1.548 0.12169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.121 on 4973 degrees of freedom
## Multiple R-squared: 0.8933, Adjusted R-squared: 0.8927
## F-statistic: 1601 on 26 and 4973 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.5356 -3.2189 0.1307 3.2779 30.2036
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 127.6224 1.8521 68.908 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 1.6055 1.1274 1.424 0.15449
## bs(RIDAGEYR, df = 7)2 0.1002 0.7841 0.128 0.89830
## bs(RIDAGEYR, df = 7)3 1.5198 0.8904 1.707 0.08792 .
## bs(RIDAGEYR, df = 7)4 2.1726 0.7939 2.736 0.00623 **
## bs(RIDAGEYR, df = 7)5 6.1933 1.0023 6.179 6.96e-10 ***
## bs(RIDAGEYR, df = 7)6 6.2261 1.0965 5.678 1.44e-08 ***
## bs(RIDAGEYR, df = 7)7 8.9828 1.2824 7.005 2.81e-12 ***
## RIAGENDRMale -0.9844 0.8928 -1.103 0.27023
## bs(BMXWT)1 55.0165 1.8038 30.500 < 2e-16 ***
## bs(BMXWT)2 88.6177 1.3929 63.622 < 2e-16 ***
## bs(BMXWT)3 115.1933 2.3317 49.403 < 2e-16 ***
## BMXHT -0.4526 0.0107 -42.295 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale -1.8019 1.6641 -1.083 0.27895
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale 1.4186 1.1449 1.239 0.21539
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale 1.2026 1.2966 0.927 0.35372
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale 3.0045 1.1525 2.607 0.00916 **
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale 3.0226 1.4490 2.086 0.03704 *
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale 3.4198 1.5763 2.170 0.03009 *
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale 2.8243 1.8454 1.530 0.12596
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.123 on 4980 degrees of freedom
## Multiple R-squared: 0.8931, Adjusted R-squared: 0.8927
## F-statistic: 2189 on 19 and 4980 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# grid.arrange(g1, g2,g3, nrow=3)
with BMI
run_model("BMXWAIST ~ BMXBMI")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.262 -4.663 0.094 4.540 27.853
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.72136 0.45795 78 <2e-16 ***
## BMXBMI 2.19416 0.01545 142 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.969 on 4998 degrees of freedom
## Multiple R-squared: 0.8014, Adjusted R-squared: 0.8014
## F-statistic: 2.017e+04 on 1 and 4998 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.6397 -3.9707 -0.0834 3.9635 27.4920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109.134705 1.446598 75.44 <2e-16 ***
## BMXWT 0.785554 0.004746 165.52 <2e-16 ***
## BMXHT -0.440891 0.009506 -46.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.009 on 4997 degrees of freedom
## Multiple R-squared: 0.8524, Adjusted R-squared: 0.8523
## F-statistic: 1.443e+04 on 2 and 4997 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT + BMXBMI")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.1689 -3.9453 -0.0781 3.9420 26.8295
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.38133 6.54753 11.207 < 2e-16 ***
## BMXWT 0.56787 0.03917 14.497 < 2e-16 ***
## BMXHT -0.22789 0.03921 -5.812 6.56e-09 ***
## BMXBMI 0.61556 0.10996 5.598 2.28e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.991 on 4996 degrees of freedom
## Multiple R-squared: 0.8533, Adjusted R-squared: 0.8532
## F-statistic: 9687 on 3 and 4996 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXBMI,df=7)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.3967 -4.4326 0.0962 4.3794 30.6575
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.241 4.453 14.652 < 2e-16 ***
## bs(BMXBMI, df = 7)1 2.454 6.003 0.409 0.682710
## bs(BMXBMI, df = 7)2 14.433 4.133 3.492 0.000483 ***
## bs(BMXBMI, df = 7)3 30.037 4.561 6.585 5.01e-11 ***
## bs(BMXBMI, df = 7)4 37.338 4.427 8.434 < 2e-16 ***
## bs(BMXBMI, df = 7)5 63.708 4.688 13.589 < 2e-16 ***
## bs(BMXBMI, df = 7)6 82.629 5.042 16.390 < 2e-16 ***
## bs(BMXBMI, df = 7)7 90.871 5.749 15.807 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.796 on 4992 degrees of freedom
## Multiple R-squared: 0.8113, Adjusted R-squared: 0.8111
## F-statistic: 3067 on 7 and 4992 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXBMI,df=7)*RIAGENDR")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.4443 -4.0031 -0.0805 3.9834 31.5606
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.7504 4.7206 13.505 < 2e-16 ***
## bs(BMXBMI, df = 7)1 2.6174 6.3436 0.413 0.679911
## bs(BMXBMI, df = 7)2 16.1627 4.4240 3.653 0.000261 ***
## bs(BMXBMI, df = 7)3 28.0847 4.8487 5.792 7.37e-09 ***
## bs(BMXBMI, df = 7)4 36.0256 4.7012 7.663 2.17e-14 ***
## bs(BMXBMI, df = 7)5 61.0694 4.9964 12.223 < 2e-16 ***
## bs(BMXBMI, df = 7)6 78.6538 5.4181 14.517 < 2e-16 ***
## bs(BMXBMI, df = 7)7 94.7887 5.9872 15.832 < 2e-16 ***
## RIAGENDRMale 6.8027 9.0898 0.748 0.454262
## bs(BMXBMI, df = 7)1:RIAGENDRMale -5.4408 12.3183 -0.442 0.658736
## bs(BMXBMI, df = 7)2:RIAGENDRMale -5.4968 8.3808 -0.656 0.511931
## bs(BMXBMI, df = 7)3:RIAGENDRMale -0.9294 9.3070 -0.100 0.920459
## bs(BMXBMI, df = 7)4:RIAGENDRMale -1.5407 9.0282 -0.171 0.864506
## bs(BMXBMI, df = 7)5:RIAGENDRMale 3.4297 9.5507 0.359 0.719534
## bs(BMXBMI, df = 7)6:RIAGENDRMale 13.8396 10.1699 1.361 0.173624
## bs(BMXBMI, df = 7)7:RIAGENDRMale -18.3918 12.2089 -1.506 0.132020
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.141 on 4984 degrees of freedom
## Multiple R-squared: 0.8462, Adjusted R-squared: 0.8458
## F-statistic: 1828 on 15 and 4984 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.3130 -3.6361 0.0063 3.5849 31.5368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.39297 4.44137 12.472 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.87715 0.93204 0.941 0.34670
## bs(RIDAGEYR, df = 7)2 0.72428 0.64121 1.130 0.25872
## bs(RIDAGEYR, df = 7)3 2.47406 0.72685 3.404 0.00067 ***
## bs(RIDAGEYR, df = 7)4 3.10688 0.64641 4.806 1.58e-06 ***
## bs(RIDAGEYR, df = 7)5 6.74146 0.81262 8.296 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 6.36109 0.88538 7.185 7.74e-13 ***
## bs(RIDAGEYR, df = 7)7 8.53823 1.03397 8.258 < 2e-16 ***
## bs(BMXBMI, df = 7)1 10.07970 5.94613 1.695 0.09011 .
## bs(BMXBMI, df = 7)2 21.37312 4.14741 5.153 2.66e-07 ***
## bs(BMXBMI, df = 7)3 32.90595 4.54424 7.241 5.13e-13 ***
## bs(BMXBMI, df = 7)4 40.81712 4.40679 9.262 < 2e-16 ***
## bs(BMXBMI, df = 7)5 66.73384 4.68411 14.247 < 2e-16 ***
## bs(BMXBMI, df = 7)6 83.75404 5.07768 16.495 < 2e-16 ***
## bs(BMXBMI, df = 7)7 102.11470 5.61013 18.202 < 2e-16 ***
## RIAGENDRMale 9.78478 8.52047 1.148 0.25087
## bs(BMXBMI, df = 7)1:RIAGENDRMale -9.88677 11.54803 -0.856 0.39196
## bs(BMXBMI, df = 7)2:RIAGENDRMale -8.20193 7.85434 -1.044 0.29642
## bs(BMXBMI, df = 7)3:RIAGENDRMale -4.10178 8.72403 -0.470 0.63825
## bs(BMXBMI, df = 7)4:RIAGENDRMale -4.34299 8.46311 -0.513 0.60786
## bs(BMXBMI, df = 7)5:RIAGENDRMale -0.03424 8.94972 -0.004 0.99695
## bs(BMXBMI, df = 7)6:RIAGENDRMale 12.45905 9.53162 1.307 0.19123
## bs(BMXBMI, df = 7)7:RIAGENDRMale -24.06824 11.43548 -2.105 0.03537 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.743 on 4977 degrees of freedom
## Multiple R-squared: 0.8657, Adjusted R-squared: 0.8651
## F-statistic: 1458 on 22 and 4977 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR) + bs(BMXBMI,df=7)*RIAGENDR")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.1655 -3.6332 -0.0267 3.5886 31.4559
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.5200 4.4293 12.535 < 2e-16 ***
## bs(RIDAGEYR)1 0.9191 0.9001 1.021 0.3072
## bs(RIDAGEYR)2 5.0388 0.6750 7.465 9.76e-14 ***
## bs(RIDAGEYR)3 7.6400 0.6017 12.698 < 2e-16 ***
## bs(BMXBMI, df = 7)1 10.3968 5.9419 1.750 0.0802 .
## bs(BMXBMI, df = 7)2 21.5491 4.1450 5.199 2.09e-07 ***
## bs(BMXBMI, df = 7)3 33.1417 4.5410 7.298 3.37e-13 ***
## bs(BMXBMI, df = 7)4 41.0256 4.4040 9.315 < 2e-16 ***
## bs(BMXBMI, df = 7)5 66.9871 4.6807 14.311 < 2e-16 ***
## bs(BMXBMI, df = 7)6 83.9822 5.0748 16.549 < 2e-16 ***
## bs(BMXBMI, df = 7)7 102.2660 5.6078 18.236 < 2e-16 ***
## RIAGENDRMale 10.2573 8.5133 1.205 0.2283
## bs(BMXBMI, df = 7)1:RIAGENDRMale -10.5877 11.5377 -0.918 0.3588
## bs(BMXBMI, df = 7)2:RIAGENDRMale -8.5860 7.8486 -1.094 0.2740
## bs(BMXBMI, df = 7)3:RIAGENDRMale -4.6051 8.7161 -0.528 0.5973
## bs(BMXBMI, df = 7)4:RIAGENDRMale -4.7986 8.4561 -0.567 0.5704
## bs(BMXBMI, df = 7)5:RIAGENDRMale -0.5467 8.9420 -0.061 0.9513
## bs(BMXBMI, df = 7)6:RIAGENDRMale 12.0413 9.5257 1.264 0.2063
## bs(BMXBMI, df = 7)7:RIAGENDRMale -24.5772 11.4264 -2.151 0.0315 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.742 on 4981 degrees of freedom
## Multiple R-squared: 0.8656, Adjusted R-squared: 0.8651
## F-statistic: 1783 on 18 and 4981 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.2939 -3.2046 0.0333 3.3150 26.7614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.875590 4.314325 0.203 0.839183
## bs(RIDAGEYR, df = 7)1 0.826852 0.840143 0.984 0.325075
## bs(RIDAGEYR, df = 7)2 0.707601 0.577984 1.224 0.220913
## bs(RIDAGEYR, df = 7)3 2.189631 0.655235 3.342 0.000839 ***
## bs(RIDAGEYR, df = 7)4 3.666038 0.582904 6.289 3.46e-10 ***
## bs(RIDAGEYR, df = 7)5 7.722513 0.733063 10.535 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.888814 0.799351 9.869 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.795058 0.934394 11.553 < 2e-16 ***
## bs(BMXBMI, df = 7)1 10.202093 5.359830 1.903 0.057041 .
## bs(BMXBMI, df = 7)2 20.427526 3.738574 5.464 4.88e-08 ***
## bs(BMXBMI, df = 7)3 33.195442 4.096175 8.104 6.63e-16 ***
## bs(BMXBMI, df = 7)4 41.028370 3.972275 10.329 < 2e-16 ***
## bs(BMXBMI, df = 7)5 65.950832 4.222311 15.620 < 2e-16 ***
## bs(BMXBMI, df = 7)6 84.272078 4.577035 18.412 < 2e-16 ***
## bs(BMXBMI, df = 7)7 100.886308 5.057092 19.949 < 2e-16 ***
## RIAGENDRMale 7.454725 7.680646 0.971 0.331802
## BMXHT 0.337095 0.009943 33.903 < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.172793 10.409587 -1.169 0.242305
## bs(BMXBMI, df = 7)2:RIAGENDRMale -9.659010 7.080017 -1.364 0.172546
## bs(BMXBMI, df = 7)3:RIAGENDRMale -6.575357 7.864158 -0.836 0.403129
## bs(BMXBMI, df = 7)4:RIAGENDRMale -7.038814 7.629044 -0.923 0.356243
## bs(BMXBMI, df = 7)5:RIAGENDRMale -1.216254 8.067339 -0.151 0.880169
## bs(BMXBMI, df = 7)6:RIAGENDRMale 5.405456 8.594300 0.629 0.529405
## bs(BMXBMI, df = 7)7:RIAGENDRMale -16.081951 10.310608 -1.560 0.118883
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.176 on 4976 degrees of freedom
## Multiple R-squared: 0.8909, Adjusted R-squared: 0.8904
## F-statistic: 1767 on 23 and 4976 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXHT)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.1739 -3.2061 0.0263 3.3132 26.9184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.6088 4.1843 10.661 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.8339 0.8402 0.992 0.321019
## bs(RIDAGEYR, df = 7)2 0.6918 0.5782 1.196 0.231568
## bs(RIDAGEYR, df = 7)3 2.1998 0.6552 3.357 0.000793 ***
## bs(RIDAGEYR, df = 7)4 3.6646 0.5830 6.285 3.55e-10 ***
## bs(RIDAGEYR, df = 7)5 7.7241 0.7332 10.535 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.8780 0.7993 9.856 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.8106 0.9349 11.563 < 2e-16 ***
## bs(BMXBMI, df = 7)1 10.3177 5.3599 1.925 0.054289 .
## bs(BMXBMI, df = 7)2 20.4662 3.7386 5.474 4.61e-08 ***
## bs(BMXBMI, df = 7)3 33.2882 4.0962 8.127 5.53e-16 ***
## bs(BMXBMI, df = 7)4 41.0916 3.9722 10.345 < 2e-16 ***
## bs(BMXBMI, df = 7)5 66.0708 4.2227 15.647 < 2e-16 ***
## bs(BMXBMI, df = 7)6 84.2406 4.5768 18.406 < 2e-16 ***
## bs(BMXBMI, df = 7)7 101.0498 5.0577 19.979 < 2e-16 ***
## RIAGENDRMale 7.6526 7.6834 0.996 0.319301
## bs(BMXHT)1 12.1370 2.6968 4.501 6.93e-06 ***
## bs(BMXHT)2 14.5437 1.1485 12.663 < 2e-16 ***
## bs(BMXHT)3 26.7484 2.2894 11.684 < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.3965 10.4125 -1.191 0.233889
## bs(BMXBMI, df = 7)2:RIAGENDRMale -9.7275 7.0820 -1.374 0.169644
## bs(BMXBMI, df = 7)3:RIAGENDRMale -6.7228 7.8659 -0.855 0.392767
## bs(BMXBMI, df = 7)4:RIAGENDRMale -7.1456 7.6307 -0.936 0.349099
## bs(BMXBMI, df = 7)5:RIAGENDRMale -1.3972 8.0700 -0.173 0.862554
## bs(BMXBMI, df = 7)6:RIAGENDRMale 5.4456 8.5969 0.633 0.526477
## bs(BMXBMI, df = 7)7:RIAGENDRMale -16.4055 10.3130 -1.591 0.111727
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.176 on 4974 degrees of freedom
## Multiple R-squared: 0.891, Adjusted R-squared: 0.8904
## F-statistic: 1626 on 25 and 4974 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.2407 -3.2202 0.0431 3.2603 28.5771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.827914 3.995947 10.718 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 0.755474 0.835178 0.905 0.36574
## bs(RIDAGEYR, df = 7)2 0.588719 0.574577 1.025 0.30560
## bs(RIDAGEYR, df = 7)3 2.025429 0.651431 3.109 0.00189 **
## bs(RIDAGEYR, df = 7)4 3.504859 0.579334 6.050 1.56e-09 ***
## bs(RIDAGEYR, df = 7)5 7.559613 0.728533 10.376 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.792854 0.794413 9.810 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.327018 0.927916 11.129 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.771601 5.328521 1.458 0.14477
## bs(BMXBMI, df = 7)2 14.121786 3.722130 3.794 0.00015 ***
## bs(BMXBMI, df = 7)3 21.612942 4.084717 5.291 1.27e-07 ***
## bs(BMXBMI, df = 7)4 26.747375 3.969215 6.739 1.78e-11 ***
## bs(BMXBMI, df = 7)5 39.967693 4.266501 9.368 < 2e-16 ***
## bs(BMXBMI, df = 7)6 49.332357 4.655212 10.597 < 2e-16 ***
## bs(BMXBMI, df = 7)7 53.818924 5.213349 10.323 < 2e-16 ***
## RIAGENDRMale 9.428689 7.634898 1.235 0.21691
## BMXWT 0.345833 0.009891 34.965 < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.337911 10.348010 -1.192 0.23320
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.659676 7.038343 -1.515 0.12996
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.026813 7.818092 -1.027 0.30461
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.312059 7.584820 -1.228 0.21961
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.362121 8.020482 -0.544 0.58655
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.172570 8.555814 -0.605 0.54549
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.865402 10.251928 -1.255 0.20956
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.146 on 4976 degrees of freedom
## Multiple R-squared: 0.8922, Adjusted R-squared: 0.8917
## F-statistic: 1790 on 23 and 4976 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXWT) + bs(BMXHT)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.2422 -3.2052 0.0235 3.2620 28.9613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.3741 10.5729 5.237 1.70e-07 ***
## bs(RIDAGEYR, df = 7)1 0.7798 0.8353 0.934 0.35058
## bs(RIDAGEYR, df = 7)2 0.6117 0.5754 1.063 0.28777
## bs(RIDAGEYR, df = 7)3 2.0287 0.6518 3.112 0.00187 **
## bs(RIDAGEYR, df = 7)4 3.5268 0.5800 6.081 1.28e-09 ***
## bs(RIDAGEYR, df = 7)5 7.5847 0.7291 10.403 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.7977 0.7947 9.812 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.3700 0.9311 11.137 < 2e-16 ***
## bs(BMXBMI, df = 7)1 6.0488 8.1714 0.740 0.45919
## bs(BMXBMI, df = 7)2 9.9793 13.5390 0.737 0.46111
## bs(BMXBMI, df = 7)3 15.1616 20.2024 0.750 0.45300
## bs(BMXBMI, df = 7)4 18.9148 23.7638 0.796 0.42610
## bs(BMXBMI, df = 7)5 27.4902 36.1524 0.760 0.44705
## bs(BMXBMI, df = 7)6 34.0036 41.2106 0.825 0.40934
## bs(BMXBMI, df = 7)7 37.8790 47.9520 0.790 0.42960
## RIAGENDRMale 9.9193 7.7092 1.287 0.19826
## bs(BMXWT)1 29.6369 37.2567 0.795 0.42637
## bs(BMXWT)2 52.4851 40.0618 1.310 0.19022
## bs(BMXWT)3 71.9854 53.6215 1.342 0.17950
## bs(BMXHT)1 1.2665 10.4929 0.121 0.90393
## bs(BMXHT)2 -5.9958 18.4955 -0.324 0.74582
## bs(BMXHT)3 -6.8691 25.4183 -0.270 0.78698
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.5355 10.3758 -1.208 0.22705
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.9102 7.1122 -1.534 0.12509
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.5410 7.8976 -1.081 0.27954
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.8170 7.6744 -1.279 0.20089
## bs(BMXBMI, df = 7)5:RIAGENDRMale -5.1220 8.0992 -0.632 0.52714
## bs(BMXBMI, df = 7)6:RIAGENDRMale -4.4335 8.8738 -0.500 0.61737
## bs(BMXBMI, df = 7)7:RIAGENDRMale -14.0453 10.4057 -1.350 0.17715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.145 on 4971 degrees of freedom
## Multiple R-squared: 0.8923, Adjusted R-squared: 0.8917
## F-statistic: 1471 on 28 and 4971 DF, p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.1584 -3.2198 0.0602 3.2325 28.7521
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.798939 6.932160 6.174 7.19e-10 ***
## bs(RIDAGEYR, df = 7)1 0.788208 0.835502 0.943 0.345525
## bs(RIDAGEYR, df = 7)2 0.580327 0.575441 1.008 0.313268
## bs(RIDAGEYR, df = 7)3 2.049186 0.652095 3.142 0.001685 **
## bs(RIDAGEYR, df = 7)4 3.520726 0.581640 6.053 1.53e-09 ***
## bs(RIDAGEYR, df = 7)5 7.581963 0.730590 10.378 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.781874 0.799491 9.734 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.363690 0.936983 11.061 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.370197 5.341725 1.380 0.167728
## bs(BMXBMI, df = 7)2 13.812297 3.807579 3.628 0.000289 ***
## bs(BMXBMI, df = 7)3 21.394652 4.341517 4.928 8.58e-07 ***
## bs(BMXBMI, df = 7)4 26.478233 4.362825 6.069 1.38e-09 ***
## bs(BMXBMI, df = 7)5 39.821874 5.381110 7.400 1.59e-13 ***
## bs(BMXBMI, df = 7)6 49.088536 6.418745 7.648 2.44e-14 ***
## bs(BMXBMI, df = 7)7 53.680178 7.901873 6.793 1.22e-11 ***
## RIAGENDRMale 8.763768 7.654206 1.145 0.252280
## BMXWT 0.344164 0.044793 7.683 1.85e-14 ***
## BMXHT 0.003086 0.044785 0.069 0.945061
## years2005-2006 -0.167660 0.314911 -0.532 0.594469
## years2007-2008 -0.182922 0.294811 -0.620 0.534975
## years2009-2010 -0.298889 0.293273 -1.019 0.308181
## years2013-2014 0.085926 0.305622 0.281 0.778606
## years2015-2016 0.302430 0.299460 1.010 0.312585
## years2017-2018 -0.225789 0.311232 -0.725 0.468198
## years2022-2012 -0.376678 0.309125 -1.219 0.223081
## bs(BMXBMI, df = 7)1:RIAGENDRMale -11.528412 10.365887 -1.112 0.266128
## bs(BMXBMI, df = 7)2:RIAGENDRMale -9.978568 7.052975 -1.415 0.157189
## bs(BMXBMI, df = 7)3:RIAGENDRMale -7.428316 7.834622 -0.948 0.343104
## bs(BMXBMI, df = 7)4:RIAGENDRMale -8.626758 7.604263 -1.134 0.256655
## bs(BMXBMI, df = 7)5:RIAGENDRMale -3.837996 8.043396 -0.477 0.633268
## bs(BMXBMI, df = 7)6:RIAGENDRMale -4.263056 8.668944 -0.492 0.622909
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.218217 10.265995 -1.190 0.234039
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.145 on 4968 degrees of freedom
## Multiple R-squared: 0.8924, Adjusted R-squared: 0.8917
## F-statistic: 1329 on 31 and 4968 DF, p-value: < 2.2e-16

# base model:
run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.2366 -3.2207 0.0431 3.2608 28.5724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.272e+01 6.919e+00 6.175 7.16e-10 ***
## bs(RIDAGEYR, df = 7)1 7.556e-01 8.353e-01 0.905 0.365712
## bs(RIDAGEYR, df = 7)2 5.890e-01 5.748e-01 1.025 0.305583
## bs(RIDAGEYR, df = 7)3 2.026e+00 6.518e-01 3.108 0.001893 **
## bs(RIDAGEYR, df = 7)4 3.505e+00 5.799e-01 6.045 1.61e-09 ***
## bs(RIDAGEYR, df = 7)5 7.560e+00 7.291e-01 10.369 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.793e+00 7.948e-01 9.805 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 1.033e+01 9.309e-01 11.095 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.777e+00 5.338e+00 1.457 0.145187
## bs(BMXBMI, df = 7)2 1.414e+01 3.805e+00 3.715 0.000206 ***
## bs(BMXBMI, df = 7)3 2.164e+01 4.340e+00 4.987 6.35e-07 ***
## bs(BMXBMI, df = 7)4 2.678e+01 4.360e+00 6.142 8.80e-10 ***
## bs(BMXBMI, df = 7)5 4.003e+01 5.379e+00 7.441 1.17e-13 ***
## bs(BMXBMI, df = 7)6 4.941e+01 6.416e+00 7.702 1.61e-14 ***
## bs(BMXBMI, df = 7)7 5.393e+01 7.899e+00 6.827 9.72e-12 ***
## RIAGENDRMale 9.424e+00 7.640e+00 1.233 0.217465
## BMXWT 3.450e-01 4.477e-02 7.707 1.54e-14 ***
## BMXHT 8.164e-04 4.474e-02 0.018 0.985441
## bs(BMXBMI, df = 7)1:RIAGENDRMale -1.234e+01 1.035e+01 -1.192 0.233253
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.066e+01 7.040e+00 -1.514 0.130127
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.024e+00 7.821e+00 -1.026 0.304956
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.307e+00 7.590e+00 -1.226 0.220190
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.355e+00 8.031e+00 -0.542 0.587641
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.149e+00 8.653e+00 -0.595 0.551848
## bs(BMXBMI, df = 7)7:RIAGENDRMale -1.287e+01 1.026e+01 -1.255 0.209654
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.146 on 4975 degrees of freedom
## Multiple R-squared: 0.8922, Adjusted R-squared: 0.8917
## F-statistic: 1716 on 24 and 4975 DF, p-value: < 2.2e-16

# grid.arrange(g1, g2,g3, nrow=3)
regression models with variables
base_form <- "BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + "
# MFA 16:1 (Hexadecenoic) (gm)
run_model(paste0(base_form,"DR1TM161"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.1862 -3.2077 0.0296 3.2544 28.5658
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.675648 6.919932 6.167 7.51e-10 ***
## bs(RIDAGEYR, df = 7)1 0.760276 0.835350 0.910 0.362799
## bs(RIDAGEYR, df = 7)2 0.593572 0.574871 1.033 0.301874
## bs(RIDAGEYR, df = 7)3 2.022479 0.651802 3.103 0.001927 **
## bs(RIDAGEYR, df = 7)4 3.503722 0.579908 6.042 1.63e-09 ***
## bs(RIDAGEYR, df = 7)5 7.534474 0.729768 10.324 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.766219 0.795478 9.763 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.295923 0.931764 11.050 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.772783 5.338092 1.456 0.145429
## bs(BMXBMI, df = 7)2 14.132997 3.805517 3.714 0.000206 ***
## bs(BMXBMI, df = 7)3 21.641396 4.339704 4.987 6.35e-07 ***
## bs(BMXBMI, df = 7)4 26.787103 4.360559 6.143 8.73e-10 ***
## bs(BMXBMI, df = 7)5 40.044374 5.379198 7.444 1.14e-13 ***
## bs(BMXBMI, df = 7)6 49.445696 6.416055 7.707 1.55e-14 ***
## bs(BMXBMI, df = 7)7 53.940241 7.899506 6.828 9.62e-12 ***
## RIAGENDRMale 9.525850 7.641438 1.247 0.212601
## BMXWT 0.344828 0.044768 7.702 1.60e-14 ***
## BMXHT 0.001652 0.044748 0.037 0.970555
## DR1TM161 -0.070643 0.084413 -0.837 0.402703
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.414530 10.349769 -1.199 0.230391
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.731526 7.040788 -1.524 0.127524
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.101492 7.821453 -1.036 0.300344
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.384214 7.591157 -1.236 0.216441
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.434286 8.031539 -0.552 0.580898
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.224766 8.654076 -0.604 0.546048
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.972825 10.260101 -1.264 0.206147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.146 on 4974 degrees of freedom
## Multiple R-squared: 0.8922, Adjusted R-squared: 0.8917
## F-statistic: 1647 on 25 and 4974 DF, p-value: < 2.2e-16

hist(data$DR1TM161)

hist(invNorm(data$DR1TM161))

run_model(paste0(base_form,"invNorm(DR1TM161)"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.2321 -3.2255 0.0421 3.2613 28.5730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.271e+01 6.924e+00 6.169 7.44e-10 ***
## bs(RIDAGEYR, df = 7)1 7.566e-01 8.355e-01 0.906 0.365227
## bs(RIDAGEYR, df = 7)2 5.891e-01 5.749e-01 1.025 0.305537
## bs(RIDAGEYR, df = 7)3 2.026e+00 6.518e-01 3.108 0.001895 **
## bs(RIDAGEYR, df = 7)4 3.505e+00 5.800e-01 6.043 1.62e-09 ***
## bs(RIDAGEYR, df = 7)5 7.559e+00 7.296e-01 10.360 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.791e+00 7.955e-01 9.794 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 1.033e+01 9.317e-01 11.083 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.780e+00 5.339e+00 1.457 0.145082
## bs(BMXBMI, df = 7)2 1.414e+01 3.806e+00 3.715 0.000206 ***
## bs(BMXBMI, df = 7)3 2.164e+01 4.340e+00 4.986 6.36e-07 ***
## bs(BMXBMI, df = 7)4 2.678e+01 4.361e+00 6.141 8.82e-10 ***
## bs(BMXBMI, df = 7)5 4.003e+01 5.380e+00 7.441 1.17e-13 ***
## bs(BMXBMI, df = 7)6 4.942e+01 6.417e+00 7.701 1.62e-14 ***
## bs(BMXBMI, df = 7)7 5.393e+01 7.900e+00 6.826 9.75e-12 ***
## RIAGENDRMale 9.434e+00 7.643e+00 1.234 0.217105
## BMXWT 3.450e-01 4.477e-02 7.706 1.55e-14 ***
## BMXHT 8.895e-04 4.476e-02 0.020 0.984144
## invNorm(DR1TM161) -4.888e-03 7.734e-02 -0.063 0.949614
## bs(BMXBMI, df = 7)1:RIAGENDRMale -1.235e+01 1.035e+01 -1.193 0.232943
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.067e+01 7.042e+00 -1.515 0.129940
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.033e+00 7.823e+00 -1.027 0.304544
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.316e+00 7.592e+00 -1.227 0.219878
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.364e+00 8.033e+00 -0.543 0.586939
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.158e+00 8.655e+00 -0.596 0.551250
## bs(BMXBMI, df = 7)7:RIAGENDRMale -1.288e+01 1.026e+01 -1.255 0.209407
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.147 on 4974 degrees of freedom
## Multiple R-squared: 0.8922, Adjusted R-squared: 0.8917
## F-statistic: 1647 on 25 and 4974 DF, p-value: < 2.2e-16

# Dietary day one sample weight
run_model(paste0(base_form,"WTDRD1"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.3057 -3.1911 0.0446 3.2660 28.5400
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.240e+01 6.925e+00 6.123 9.88e-10 ***
## bs(RIDAGEYR, df = 7)1 7.668e-01 8.353e-01 0.918 0.358683
## bs(RIDAGEYR, df = 7)2 5.884e-01 5.748e-01 1.024 0.306089
## bs(RIDAGEYR, df = 7)3 2.027e+00 6.518e-01 3.111 0.001876 **
## bs(RIDAGEYR, df = 7)4 3.522e+00 5.801e-01 6.072 1.36e-09 ***
## bs(RIDAGEYR, df = 7)5 7.530e+00 7.296e-01 10.321 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.793e+00 7.948e-01 9.806 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 1.030e+01 9.314e-01 11.054 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.828e+00 5.338e+00 1.466 0.142603
## bs(BMXBMI, df = 7)2 1.420e+01 3.806e+00 3.732 0.000192 ***
## bs(BMXBMI, df = 7)3 2.170e+01 4.340e+00 5.001 5.90e-07 ***
## bs(BMXBMI, df = 7)4 2.684e+01 4.361e+00 6.156 8.04e-10 ***
## bs(BMXBMI, df = 7)5 4.012e+01 5.380e+00 7.459 1.03e-13 ***
## bs(BMXBMI, df = 7)6 4.955e+01 6.417e+00 7.721 1.38e-14 ***
## bs(BMXBMI, df = 7)7 5.402e+01 7.899e+00 6.838 8.99e-12 ***
## RIAGENDRMale 9.449e+00 7.640e+00 1.237 0.216222
## BMXWT 3.442e-01 4.477e-02 7.688 1.79e-14 ***
## BMXHT 3.263e-03 4.479e-02 0.073 0.941932
## WTDRD1 -1.753e-06 1.572e-06 -1.115 0.264801
## bs(BMXBMI, df = 7)1:RIAGENDRMale -1.243e+01 1.035e+01 -1.201 0.229720
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.072e+01 7.040e+00 -1.523 0.127890
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.066e+00 7.821e+00 -1.031 0.302415
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.355e+00 7.590e+00 -1.232 0.217847
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.329e+00 8.031e+00 -0.539 0.589897
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.372e+00 8.655e+00 -0.621 0.534860
## bs(BMXBMI, df = 7)7:RIAGENDRMale -1.261e+01 1.026e+01 -1.229 0.219120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.146 on 4974 degrees of freedom
## Multiple R-squared: 0.8922, Adjusted R-squared: 0.8917
## F-statistic: 1647 on 25 and 4974 DF, p-value: < 2.2e-16

# BMXARML - Upper Arm Length (cm)
run_model(paste0(base_form,"BMXARML"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.3497 -3.2137 0.0284 3.2656 28.6088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.66512 6.91996 6.166 7.58e-10 ***
## bs(RIDAGEYR, df = 7)1 0.74740 0.83538 0.895 0.371001
## bs(RIDAGEYR, df = 7)2 0.59229 0.57485 1.030 0.302906
## bs(RIDAGEYR, df = 7)3 2.02701 0.65179 3.110 0.001882 **
## bs(RIDAGEYR, df = 7)4 3.51399 0.57998 6.059 1.47e-09 ***
## bs(RIDAGEYR, df = 7)5 7.58918 0.72986 10.398 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 7.84555 0.79702 9.844 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.38092 0.93285 11.128 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.73798 5.33823 1.450 0.147250
## bs(BMXBMI, df = 7)2 14.13971 3.80549 3.716 0.000205 ***
## bs(BMXBMI, df = 7)3 21.67687 4.33987 4.995 6.09e-07 ***
## bs(BMXBMI, df = 7)4 26.83560 4.36097 6.154 8.17e-10 ***
## bs(BMXBMI, df = 7)5 40.14602 5.38080 7.461 1.01e-13 ***
## bs(BMXBMI, df = 7)6 49.49629 6.41658 7.714 1.47e-14 ***
## bs(BMXBMI, df = 7)7 54.17270 7.90433 6.854 8.08e-12 ***
## RIAGENDRMale 9.33984 7.64100 1.222 0.221640
## BMXWT 0.34521 0.04477 7.711 1.50e-14 ***
## BMXHT 0.01029 0.04601 0.224 0.823111
## BMXARML -0.04286 0.04859 -0.882 0.377731
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.25223 10.34974 -1.184 0.236541
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.55376 7.04116 -1.499 0.133972
## bs(BMXBMI, df = 7)3:RIAGENDRMale -7.93088 7.82155 -1.014 0.310643
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.21597 7.59124 -1.214 0.224795
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.27931 8.03138 -0.533 0.594179
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.03157 8.65456 -0.581 0.561012
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.88972 10.25933 -1.256 0.209034
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.146 on 4974 degrees of freedom
## Multiple R-squared: 0.8922, Adjusted R-squared: 0.8917
## F-statistic: 1647 on 25 and 4974 DF, p-value: < 2.2e-16

#BMXLEG - Upper Leg Length (cm)
run_model(paste0(base_form,"BMXLEG"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.7138 -3.2677 0.0116 3.2808 28.9013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.21014 6.77594 5.787 7.62e-09 ***
## bs(RIDAGEYR, df = 7)1 0.72294 0.81748 0.884 0.376549
## bs(RIDAGEYR, df = 7)2 0.53882 0.56257 0.958 0.338219
## bs(RIDAGEYR, df = 7)3 1.51739 0.63878 2.375 0.017565 *
## bs(RIDAGEYR, df = 7)4 2.89230 0.56901 5.083 3.85e-07 ***
## bs(RIDAGEYR, df = 7)5 6.65949 0.71612 9.299 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 6.75420 0.78098 8.648 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 9.78833 0.91179 10.735 < 2e-16 ***
## bs(BMXBMI, df = 7)1 7.03414 5.22425 1.346 0.178221
## bs(BMXBMI, df = 7)2 13.58603 3.72437 3.648 0.000267 ***
## bs(BMXBMI, df = 7)3 21.51386 4.24697 5.066 4.22e-07 ***
## bs(BMXBMI, df = 7)4 26.46936 4.26741 6.203 6.00e-10 ***
## bs(BMXBMI, df = 7)5 39.90578 5.26421 7.581 4.08e-14 ***
## bs(BMXBMI, df = 7)6 48.62258 6.27904 7.744 1.16e-14 ***
## bs(BMXBMI, df = 7)7 55.14802 7.73111 7.133 1.12e-12 ***
## RIAGENDRMale 9.33699 7.47718 1.249 0.211822
## BMXWT 0.33556 0.04382 7.659 2.25e-14 ***
## BMXHT 0.13076 0.04465 2.929 0.003418 **
## BMXLEG -0.43111 0.02904 -14.844 < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.02113 10.12821 -1.187 0.235325
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.61473 6.88977 -1.541 0.123466
## bs(BMXBMI, df = 7)3:RIAGENDRMale -7.89461 7.65376 -1.031 0.302371
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.14422 7.42838 -1.231 0.218387
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.17364 7.85936 -0.531 0.595414
## bs(BMXBMI, df = 7)6:RIAGENDRMale -3.39482 8.46949 -0.401 0.688563
## bs(BMXBMI, df = 7)7:RIAGENDRMale -15.56371 10.04177 -1.550 0.121230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.036 on 4974 degrees of freedom
## Multiple R-squared: 0.8968, Adjusted R-squared: 0.8962
## F-statistic: 1728 on 25 and 4974 DF, p-value: < 2.2e-16

# grid.arrange(g1, g2,g3, nrow=3)